package com.muki.corpus;

import java.util.regex.Matcher;
import java.util.regex.Pattern;

import javax.jms.JMSException;

import com.muki.jms.JmsSender;

public class MukiUriSpiderParser implements SpiderParser {

	private final String regx="\\s*(?i)href\\s*=\\s*(\"([^\"]*\")|'[^']*'|([^'\">\\s]+))";
	private String topic;
	private JmsSender sender = new JmsSender();
	
	public MukiUriSpiderParser(String topic) {
		this.topic = topic;
	}
	
	@Override
	public void parse(String context) {
		Pattern pattern = Pattern.compile(regx,
				Pattern.CASE_INSENSITIVE);
		Matcher matcher = pattern.matcher(context);
		while(matcher.find()) {
			String link=matcher.group();
			handle(link);
		}
	}

	@Override
	public void handle(String link) {
		link=link.replaceAll("href\\s*=\\s*(['|\"]*)", "");
		link=link.replaceAll("['|\"]", "");
		try {
			sender.sendTextMsg(false, link+"__"+topic, "urls");
		} catch (JMSException e) {
			e.printStackTrace();
		}
	}

}
